rm(list = ls(all = TRUE))
#library(ggplot2)

# DATA AND PATHS
# --------------
load("./generated_data/3-budget_data_with_estimated_positions.RData")

# CASE SELECTION
# --------------
# only keep ministers who participated in the budget debate (i.e.,
# drop those for which estimated policy positions are not available)
data <- data[!is.na(data$ws.cab.rescaled),]

# only keep unique observations (i.e., only keep one observation for
# junior ministers who are assigned to multiple portfolios)
data <- data[!duplicated(paste(data$memberID,data$debate.year)),]


# PLOT RESCALED WORDSCORES: PM vs FM
# ----------------------------------
d1 <- subset(data,data$pm==1 | data$fm==1)
d1$portfolio[d1$portfolio=="Enterprise"] <- "Taoiseach"
d1$portfolio <- as.factor(d1$portfolio)
levels(d1$portfolio) <- c("Finance Minister","Prime Minister")

d2 <- subset(data,!(data$pm==1 | data$fm==1))

mean <- data.frame(ws.cab.rescaled.mean=tapply(d2$ws.cab.rescaled,d2$budget.year,mean))
mean$budget.year <- rownames(mean)
mean$portfolio <- NA # needed so that this can be combined with below plot

median <- data.frame(ws.cab.rescaled.median=tapply(d2$ws.cab.rescaled,d2$budget.year,median))
median$budget.year <- rownames(median)
median$portfolio <- NA # needed so that this can be combined with below plot


# plot PM and FM scores as lines and points
p <- ggplot(d1, aes(x=as.factor(budget.year), y=ws.cab.rescaled, group=portfolio)) +
    theme_bw() +
    geom_line(aes(color=portfolio), size=1.5) +
    geom_point(aes(shape=portfolio), fill="black", size=3) +
    scale_shape_manual(values=c(15,17)) +
    xlab("Budget year") +
    ylab("Position") +
    theme(text = element_text(size=14)) +     
    theme(legend.position = "bottom") +
    guides(
        shape = guide_legend(title = "Reference text:"),
        color = guide_legend(title = "Reference text:")) +
    labs(title="Estimated intra-cabinet positions over time")


# add all other cabinet member scores as points
p <- p + geom_point(data=d2, aes(x=as.factor(budget.year), y=ws.cab.rescaled), size=3)


# add mean and median score of all other cabinet members
## p <- p + geom_line(data=mean, aes(x=as.factor(budget.year),y=ws.cab.rescaled.mean), linetype="dashed", size=1)

p <- p + geom_line(data=median, aes(x=as.factor(budget.year),y=ws.cab.rescaled.median), linetype="dotted", size=1)


pdf(file="./plots/figure1-wordscores_cabinet_rescaled.pdf", height=6, width=9)
print(p)
dev.off()


# PLOT ORIGINAL WORDSCORES: PM vs FM
# ----------------------------------
d1 <- subset(data,data$pm==1 | data$fm==1)
d1$portfolio[d1$portfolio=="Enterprise"] <- "Taoiseach"
d1$portfolio <- as.factor(d1$portfolio)
levels(d1$portfolio) <- c("Finance Minister","Prime Minister")


d2 <- subset(data,!(data$pm==1 | data$fm==1))

mean <- data.frame(ws.cab.mean=tapply(d2$ws.cab,d2$budget.year,mean))
mean$budget.year <- rownames(mean)
mean$portfolio <- NA # needed so that this can be combined with below plot

median <- data.frame(ws.cab.median=tapply(d2$ws.cab,d2$budget.year,median))
median$budget.year <- rownames(median)
median$portfolio <- NA # needed so that this can be combined with below plot


# plot PM and FM scores as lines and points
p <- ggplot(d1, aes(x=as.factor(budget.year), y=ws.cab, group=portfolio)) +
    theme_bw() +
    geom_line(aes(color=portfolio), size=1.5) +
    geom_point(aes(shape=portfolio), fill="black", size=3) +
    scale_shape_manual(values=c(15,17)) +
    xlab("Budget year") +
    ylab("Position") +
    theme(text = element_text(size=14)) +     
    theme(legend.position = "bottom") +
    guides(
        shape = guide_legend(title = "Reference text:"),
        color = guide_legend(title = "Reference text:")) +
    labs(title="Estimated intra-cabinet positions over time")


# add all other cabinet member scores as points
p <- p + geom_point(data=d2, aes(x=as.factor(budget.year), y=ws.cab), size=3)


# add mean and median score of all other cabinet members
## p <- p + geom_line(data=mean, aes(x=as.factor(budget.year),y=ws.cab.mean), linetype="dashed", size=1)

p <- p + geom_line(data=median, aes(x=as.factor(budget.year),y=ws.cab.median), linetype="dotted", size=1)


pdf(file="./plots/appendix_figure1.1-wordscores_cabinet.pdf", height=6, width=9)
print(p)
dev.off()


# PLOT ESTIMATED POSITIONS BY PORTFOLIO: WORSCORES
# ------------------------------------------------
d <- data

# order by average value
order <- order(tapply(d$ws.cab.rescaled,d$portfolio,mean))

d$portfolio <- as.factor(d$portfolio)

d$portfolio <- factor(d$portfolio,levels(d$portfolio)[order])
levels(d$portfolio)[levels(d$portfolio)=="Taoiseach"] <- "Prime minister"

# recode pm portfolio in 2001
d$portfolio[d$pm==1 & d$budget.year==2001] <- "Prime minister"

# drop PM and FM junior ministers because they will also be removed in the regression model
d <- d[!(d$portfolio=="Prime minister" & d$pm==0),]
d <- d[!(d$portfolio=="Finance" & d$fm==0),]


# different plot symbols for ministers and junior ministers
d$junior <- rep(0,nrow(d))
d$junior[d$position=="Minister of State"] <- 1
d$junior <- as.factor(d$junior)
levels(d$junior) <- c("Minister","Junior minister")



p <- ggplot(d, aes(x=ws.cab.rescaled, y=portfolio, shape=junior)) +
    theme_bw() +
    geom_point(aes(shape=junior),size=3) +
    facet_wrap(~ budget.year, ncol=4) +
    xlab("Position on intra-cabinet dimension") +
    ylab("") +
    labs(title="Estimated intra-cabinet positions by year and portfolio") + 
    theme(text = element_text(size=16)) +
    theme(axis.title.x = element_text(vjust=-1)) +
    theme(legend.position = "bottom") +
    guides(
        shape = guide_legend(title = "Cabinet status:"))



pdf(file="./plots/appendix_figure1.2-positions_by_portfolio_wordscores.pdf", height=16, width=14)
print(p)
dev.off()


# Average distance between speakers from the same dept
# drop ref docs
d <- d[!(d$portfolio=="Prime minister"),]
d <- d[!(d$portfolio=="Finance"),]


aggMax <- aggregate(d$ws.cab.rescaled,by=list(d$budget.year,d$portfolio), max)
names(aggMax) <- c("year","portfolio","max")

aggMin <- aggregate(d$ws.cab.rescaled,by=list(d$budget.year,d$portfolio), min)
names(aggMin) <- c("year","portfolio","min")
aggObs <- aggregate(rep(1,nrow(d)),by=list(d$budget.year,d$portfolio), sum)
names(aggObs) <- c("year","portfolio","obs")

a <- merge(aggMax,aggMin)
a <- merge(a,aggObs)

table(a$obs)
# -> in 31 out of 141 cases more than one speaker from the same dept

a$dist <- a$max - a$min

round(mean(a$dist[a$obs>1]),2)
round(sd(a$dist[a$obs>1]),2)

# Average distance between speakers from different dept across all years
aggMax <- aggregate(d$ws.cab.rescaled,by=list(d$budget.year), max)
names(aggMax) <- c("year","max")

aggMin <- aggregate(d$ws.cab.rescaled,by=list(d$budget.year), min)
names(aggMin) <- c("year","min")


a <- merge(aggMax,aggMin)

a$dist <- a$max - a$min

round(mean(a$dist),2)
round(sd(a$dist),2)
